# mTurk maker 
################################################################################
# Dependencies
################################################################################
library(data.table)
library(purrr)
library(lubridate)
library(readr)
library(dplyr)
library(readr)
library(readxl)
library(irr)
library(tidyr)
library(ggplot2)
library(tidyverse)
library(tidycomm)
################################################################################
# Setup
################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)

emo_vec <- read_file("./emoji_regex_vector.txt")

emojy_replace <- function(string, replacment, emo_vec){
  stringr::str_replace_all(string, emo_vec, replacment)
}
################################################################################
# Relevance
################################################################################

mturk_mael <- read_csv("../data/mTurk_data/training_data_newsarticles_relevance.csv") 


#mTurk Results
df <- read_delim("../data/mTurk_data/batch_results_newsarticles_relevance_final.csv", delim = ";",
               col_types = cols(ApprovalTime = col_character(), 
                                RejectionTime = col_character(), 
                                RequesterFeedback = col_character(),
                                Approve = col_character(),
                                Reject = col_character()))

df <- df %>% filter(is.na(Input.relevant_ra) == F) %>% 
             mutate(Input.relevant_ra = ifelse(Input.relevant_ra == 1, "Relevant", "Irrelevnt"))

# Quality of mTurk Workers
worker_qual <- df %>% filter(Input.relevance_agreement_check == 1) %>% 
                      mutate(worker_vs_ra = ifelse(Answer.category.label == Input.relevant_ra, 1, 0)) %>% 
                      group_by(WorkerId) %>% 
                      summarise(quality = sum(worker_vs_ra)/n(),
                                n_questions = n())

summary(worker_qual$quality)

batch1_real <- df %>% filter(Input.relevance_agreement_check == 1) %>%
  filter(is.na(Answer.category.label) == F) %>% ungroup %>% 
  group_by(Input.id)

head(batch1_real$Input.id)

batch1_real$pseudo_coder_id <- rep_along(1:nrow(batch1_real), 1:2)
batch1_real <- batch1_real %>% group_by(Input.id,pseudo_coder_id) %>% distinct(Input.id,pseudo_coder_id, .keep_all = T)

batch1_real <- as_tibble(batch1_real)

# Agreement over all coders...
batch1_real %>% test_icr(unit_var = Input.id, coder_var = pseudo_coder_id, Answer.category.label)
batch1_real %>% tab_frequencies(Answer.category.label)


# Accuracy over all 
batch_accuraacy <- batch1_real %>% select(c(Input.id,
                                        Input.relevance_agreement_check,
                                        Input.relevant_ra,Answer.category.label,pseudo_coder_id)) %>%
                               pivot_wider(names_from = c(pseudo_coder_id), values_from = Answer.category.label) %>% 
                               mutate(relevant_mturk = ifelse(`1` == `2`, `1`, FALSE))

batch_accuraacy <- batch_accuraacy %>% mutate(mturk_vs_ra = ifelse(relevant_mturk == Input.relevant_ra, T, F))

batch_accuraacy  %>% group_by(mturk_vs_ra) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

################################################################################
# Problem Solution
################################################################################
mturk_mael <- read_csv("../data/mTurk_data/training_data_newsarticles_problem_solution_final.csv") 


#mTurk Results
df <- read_delim("../data/mTurk_data/batch_results_newsarticles_problem_solution_final.csv", 
               delim = ";",
               col_types = cols(ApprovalTime = col_character(), 
                                RejectionTime = col_character(), 
                                RequesterFeedback = col_character(),
                                Approve = col_character(),
                                Reject = col_character()))

df <- df %>% filter(is.na(Input.problem_solution_ra) == F)

unique(df$Input.problem_solution_ra)

# Quality of mTurk Workers
worker_qual <- df %>% 
  mutate(worker_vs_ra = ifelse(Answer.category.label == Input.problem_solution_ra, 1, 0)) %>% 
  group_by(WorkerId) %>% 
  summarise(quality = sum(worker_vs_ra)/n(),
            n_questions = n())

summary(worker_qual$quality)

batch1_real <- df %>%
  filter(is.na(Answer.category.label) == F) %>% ungroup %>% 
  group_by(Input.id)

head(batch1_real$Input.id)

batch1_real$pseudo_coder_id <- rep_along(1:nrow(batch1_real), 1:2)
batch1_real <- batch1_real %>% group_by(Input.id,pseudo_coder_id) %>% distinct(Input.id,pseudo_coder_id, .keep_all = T)

batch1_real <- as_tibble(batch1_real)

# Agreement over all coders...
batch1_real %>% test_icr(unit_var = Input.id, coder_var = pseudo_coder_id, Answer.category.label)
batch1_real %>%  tab_frequencies(Answer.category.label)


# Accuracy over all 
batch_accuraacy <- batch1_real %>% select(c(Input.id,
                                            Input.problem_solution_agreement_check,
                                            Input.problem_solution_ra,Answer.category.label,pseudo_coder_id)) %>%
  pivot_wider(names_from = c(pseudo_coder_id), values_from = Answer.category.label) %>% 
  mutate(relevant_mturk = ifelse(`1` == `2`, `1`, FALSE))

batch_accuraacy <- batch_accuraacy %>% mutate(mturk_vs_ra = ifelse(relevant_mturk == Input.problem_solution_ra, T, F))

batch_accuraacy  %>% group_by(mturk_vs_ra) %>% summarise(n = n()) %>% mutate(f = n/sum(n))
